
/* THIS DO-FILE WAS WRITTEN IN MAY 2019 BY MARGHERITA COMOLA,
IT RUNS THE SIMULATIONS FOR THE INTEGRATED MEAN SQUARED ERROR OF THE TOTAL TREATMENT 
EFFECT FOR THE ERDOS-RENYI NETWORK FORMATION PROCESS
(`Treatment Effects Accounting for Network Changes', Comola Prina, REStat) */


/********************************************************************************************
*********************************************************************************************
1) SETTINGS
*********************************************************************************************
*********************************************************************************************/

clear all
set more off

/* observations */
global obs="50"
global nall=$obs^2

/* number of simulations*/
global simul = "100"

/* number of draws*/
global draw = "100"

/* density of binary network at baseline */
global density = "0.1"

/* intervention-driven network changes: increase in link probability at endline
[takes values: 0, 0.02, 0.05, 0.1] */
global ntk_change="0.05"

/* measurement error, in % points [takes values: 1, 5 or 10] */
global prob_switch="1"

/* model's parameters */
global gamma="10"
global beta1="0.5"
global beta2="0.2"

/* variance of the error term in each of the two PE equations */
global var_PE="0.5"

/********************************************************************************************
*********************************************************************************************
2) TTE FUNCTIONS FOR STATIC AND DYNAMIC MODELS
*********************************************************************************************
*********************************************************************************************/

mata

/* STATIC MODEL */ 

function static_TE (A, B, C)
{
A1 = J($obs, $obs, . )
for (i=1; i<=$obs; i++) {
e= J($obs,1,0)
e[i]=1
A1[.,i]=(luinv(I($obs)-A*B))*(C*e)
}
return (A1)
}

end

/* DYNAMIC MODEL */ 

mata

function dynamic_TE (A, B, C, D, E, F, I)

{

BLOCK=I*J($obs, $obs, 1)
A2 = J($obs, $obs, . )

for (i=1; i<=$obs; i++) {

e= J($obs,1,0)
e[i]=1

DER_G=J($obs, $obs, 0)
DER_G[i,.]=BLOCK[i,.]
DER_G[.,i]=BLOCK[.,i]

S=luinv(I($obs)-A*B-C*D)

M=E*F

DER_S=S*C*DER_G*S

DER_M=E*e

A2[.,i]=DER_S*M+S*DER_M

}
return (A2)

}

end

/*********************************************************************************************
*********************************************************************************************
3) PROGRAM 
*********************************************************************************************
*********************************************************************************************/

capture program drop simulation_ComolaPrina_mse1
program define simulation_ComolaPrina_mse1
drop _all

/****************************************************************************
* GENERATE THE DATA
****************************************************************************/

/* DYADIC DATASET */

set obs $obs
gen id=_n

/* treatment */

gen temp=runiform()
gen itt=(temp>=0.5)
drop temp

/* error term in PE equations */

gen er0=rnormal(0,$var_PE)
gen er1=rnormal(0,$var_PE)

mata: ITT= st_data(., "itt")
mata: er0 = st_data(., "er0")
mata: er1 = st_data(., "er1")

drop er0 er1

sort id
save simul_ComolaPrina_mse.dta, replace

sort id 
foreach i in id itt {
	rename `i' `i'_j
}

qui sum
save jjj.dta, replace

foreach i in id itt {
	rename `i'_j `i'_i
}

cross using jjj.dta
capture erase jjj.dta

order id_i id_j itt_i itt_j

/* BINARY NETWORKS */ 

preserve
clear

/* binary network at baseline */

/* Erdős–Rényi random graph*/
nwrandom $obs, prob($density) undirected

stack net1- net$obs, into (link0)
drop _stack
gen serial=_n
sort serial
save temp1.dta, replace
restore

/* intervention-driven network change */

preserve
clear

/* Erdős–Rényi random graph*/
nwrandom $obs, prob($density+$ntk_change) undirected

stack net1- net$obs, into (link_temp)
drop _stack
gen serial=_n
sort serial
save temp2.dta, replace
restore

sort id_i id_j
gen serial=_n

sort serial
merge serial using temp1.dta
drop _merge

sort serial
merge serial using temp2.dta
drop _merge serial


/* binary network at endline */

gen link1=link0
gen temp=$ntk_change
replace link1=link_temp if (itt_i==1 | itt_j==1) & (id_i!= id_j) & temp!=0
drop temp link_temp

/* measurement error 
need to be symmetric --> lower triangular matrix */

preserve

keep id_i id_j link0 link1
drop if id_i==id_j
drop if id_i>id_j

shufflevar link0 link1

/* switch indicator */

local treshold r(p$prob_switch)

gen random1=uniform() 
qui sum random1, detail
gen switch1=(random1<`treshold') 

gen random2=uniform() 
qui sum random2, detail
gen switch2=(random2<`treshold') 

drop link0 link1 random1 random2
ren id_i id1
ren id_j id2
sort id1 id2

save temp3.dta, replace

restore

/* re-merge and symmetrize */

ren id_i id1
ren id_j id2

sort id1 id2 
merge id1 id2 using temp3.dta
drop _merge

ren id1 id_i
ren id2 id_j 

ren id_j id1
ren id_i id2

ren switch1 switch1A
ren switch2 switch2A
ren link0_shuffled link0_shuffledA
ren link1_shuffled link1_shuffledA

sort id1 id2 
merge id1 id2 using temp3.dta
drop _merge

ren id1 id_j
ren id2 id_i 

replace switch1= switch1A if switch1==.
replace switch1= 0 if switch1==.
drop switch1A

replace switch2= switch2A if switch2==.
replace switch2= 0 if switch2==.
drop switch2A

replace link0_shuffled= link0_shuffledA if link0_shuffled==.
replace link0_shuffled= 0 if link0_shuffled==.
drop link0_shuffledA

replace link1_shuffled= link1_shuffledA if link1_shuffled==.
replace link1_shuffled= 0 if link1_shuffled==.
drop link1_shuffledA

sort id_i id_j

capture erase temp1.dta
capture erase temp2.dta
capture erase temp3.dta

/* add measurement errors to binary networks */

replace link0=link0_shuffled if switch1==1
replace link1=link1_shuffled if switch2==1

/* rearrange */

drop link0_shuffled link1_shuffled switch1 switch2
sort id_i id_j

mata: G0 = st_data(., "link0")
mata: G0 = colshape(G0, $obs) 

mata: G1 = st_data(., "link1")
mata: G1 = colshape(G1, $obs) 

/* semi row standardization */

mata: G0_S=rowsum(G0,.)
mata: G0=G0:/G0_S

mata: G1_S=rowsum(G1,.)
mata: G1=G1:/G1_S

mata: mata drop G0_S G1_S

/* edit missing */

mata: G0=editmissing(G0, 0)
mata: G1=editmissing(G1, 0)

/* build delta_G */

mata: deltaG=G1-G0

mata: st_matrix("G0", G0)
mata: st_matrix("G1", G1)
mata: st_matrix("deltaG", deltaG)

/* re-import into Stata*/

preserve

clear
svmat G0
stack G01-G0$obs, into (G0_srs)
gen serial=_n
sort serial 
drop _stack
save temp0.dta, replace 

clear
svmat G1
stack G11-G1$obs, into (G1_srs)
gen serial=_n
sort serial 
drop _stack
save temp1.dta, replace 

clear
svmat deltaG
stack deltaG1-deltaG$obs, into (deltaG_srs)
gen serial=_n
sort serial 
drop _stack
save temp2.dta, replace 

restore

gen serial=_n

sort serial 
merge serial using temp0
drop _merge
capture erase temp0.dta

sort serial 
merge serial using temp1
drop _merge
capture erase temp1.dta

sort serial 
merge serial using temp2
drop serial _merge
capture erase temp2.dta

/* save estimated theta1_hat from srs dyadic regression */

gen some_itt=(itt_i==1 | itt_j==1)
reg deltaG_srs some_itt if id_i!= id_j

matrix coeffs_dyadic=e(b)
matrix theta1_hat=coeffs_dyadic[1,1]

mata: theta1_hat=st_matrix("theta1_hat")

/* vectors of outcomes */ 

mata: A=I($obs)-$beta1*G0
mata: y0=lusolve(A,er0)

mata: A=I($obs)-$beta1*G0-$beta2*deltaG
mata: B=$gamma*ITT+er1
mata: y1=lusolve(A,B)
mata: mata drop A B

/*vectors of interest*/

mata: deltay=y1-y0
mata: G0_y0=G0*y0
mata: G0_y1=G0*y1
mata: G0_deltay=G0*deltay
mata: deltaG_y1=deltaG*y1

/*IVs of second order*/

mata: IV_2_1=G0*G0*ITT
mata: IV_2_2=deltaG*deltaG*ITT
mata: IV_2_3=G0*deltaG*ITT
mata: IV_2_4=deltaG*G0*ITT

/*IVs of third order*/

mata: IV_3_1=G0*G0*G0*ITT
mata: IV_3_2=deltaG*deltaG*deltaG*ITT
mata: IV_3_3=G0*G0*deltaG*ITT
mata: IV_3_4=deltaG*deltaG*G0*ITT
mata: IV_3_5=G0*deltaG*deltaG*ITT
mata: IV_3_6=deltaG*G0*G0*ITT
mata: IV_3_7=G0*deltaG*G0*ITT
mata: IV_3_8=deltaG*G0*deltaG*ITT

mata: results=(y0, y1, deltay, G0_y0, G0_y1, G0_deltay, deltaG_y1, IV_2_1, IV_2_2, IV_2_3, IV_2_4, IV_3_1, IV_3_2, IV_3_3, IV_3_4, IV_3_5, IV_3_6, IV_3_7, IV_3_8)
mata: st_matrix("result", results)

/*re-import into Stata*/

clear
svmat result

ren result1 y0
ren result2 y1
ren result3 deltay
ren result4 G0_y0
ren result5 G0_y1
ren result6 G0_deltay
ren result7 deltaG_y1
ren result8 IV_2_1
ren result9 IV_2_2
ren result10 IV_2_3
ren result11 IV_2_4
ren result12 IV_3_1
ren result13 IV_3_2
ren result14 IV_3_3
ren result15 IV_3_4
ren result16 IV_3_5
ren result17 IV_3_6
ren result18 IV_3_7
ren result19 IV_3_8

gen id=_n
sort id
save temp.dta, replace 

use simul_ComolaPrina_mse.dta, clear
merge id using temp.dta
drop _merge
capture erase temp.dta

/****************************************************************************
* ESTIMATE THE MODELS OF INTEREST
****************************************************************************/

global IV_list="IV_2_1 IV_2_2 IV_2_3 IV_2_4 IV_3_1 IV_3_2 IV_3_3 IV_3_4 IV_3_5 IV_3_6 IV_3_7 IV_3_8"

* no PE

regress deltay itt, nocons 

matrix coeffs_OLS=e(b)
matrix gamma_OLS=coeffs_OLS[1,1]
mata: gamma_OLS=st_matrix("gamma_OLS")

* standard PE, IV

ivregress 2sls deltay itt (G0_deltay = $IV_list), nocons

matrix coeffs_stat=e(b)
matrix gamma_stat= coeffs_stat[1,2]
matrix beta1_stat= coeffs_stat[1,1]

mata: gamma_stat=st_matrix("gamma_stat")
mata: beta1_stat=st_matrix("beta1_stat")

* dynamic PE, IV

ivregress 2sls deltay itt (G0_deltay deltaG_y1 = $IV_list), nocons 

matrix coeffs_dyn=e(b)
matrix gamma_dyn= coeffs_dyn[1,3]
matrix  beta1_dyn= coeffs_dyn[1,1]
matrix  beta2_dyn= coeffs_dyn[1,2]

mata: gamma_dyn=st_matrix("gamma_dyn")
mata: beta1_dyn=st_matrix("beta1_dyn")
mata: beta2_dyn=st_matrix("beta2_dyn")

/* re-import into Stata */

mata: results=(gamma_OLS, gamma_stat, beta1_stat, gamma_dyn, beta1_dyn, beta2_dyn, theta1_hat)
mata: st_matrix("result", results)

clear
svmat result

ren result1 gamma_ols
ren result2 gamma_stat
ren result3 beta1_stat
ren result4 gamma_dyn
ren result5 beta1_dyn
ren result6 beta2_dyn
ren result7 theta1_hat

capture erase simul_ComolaPrina_mse.dta

end

/*********************************************************************************************
**********************************************************************************************
4) REPEAT THE PROCEDURE N TIMES AND SAVE THE BETAS
**********************************************************************************************
*********************************************************************************************/

/* blank data */

clear
set more off

set obs 0
gen sim=_n

save results_coeffs_$ntk_change.dta, replace

/* repeat the procedure and format appropriately */

set seed 07031979

forvalues i=1/$simul {

simulation_ComolaPrina_mse1

gen sim=`i'

save results_mse_`i'.dta, replace

use results_coeffs_$ntk_change.dta, clear
append using results_mse_`i'.dta
save results_coeffs_$ntk_change.dta, replace

erase results_mse_`i'.dta
}

egen theta1=mean(theta1_hat)
save results_coeffs_$ntk_change.dta, replace

/*********************************************************************************************
**********************************************************************************************
5) GENERATE DRAWS OF NETWORKS AND ITT - AND SAVE ESTIMATES OF ITE/DTE/TTE
OF THE STATIC AND DYNAMIC PEER EFFECT MODELS FOR EACH DRAW, FOR ALL SIMULATIONS
**********************************************************************************************
*********************************************************************************************/

clear
save results_mse_$ntk_change.dta, emptyok replace

forvalues dr=1/$draw {

clear
set obs $obs
gen id=_n

/* treatment */

gen temp=runiform()
gen itt=(temp>=0.5)
drop temp

mata: ITT= st_data(., "itt")

sort id 
foreach i in id itt {
	rename `i' `i'_j
}

qui sum
save jjj.dta, replace

foreach i in id itt {
	rename `i'_j `i'_i
}

cross using jjj.dta
capture erase jjj.dta

order id_i id_j itt_i itt_j

/* BINARY NETWORKS */ 

preserve
clear

/* binary network at baseline */

/* Erdős–Rényi random graph*/
nwrandom $obs, prob($density) undirected

stack net1- net$obs, into (link0)
drop _stack
gen serial=_n
sort serial
save temp1.dta, replace
restore

/* intervention-driven network change */

preserve
clear

/* Erdős–Rényi random graph*/
nwrandom $obs, prob($density+$ntk_change) undirected

stack net1- net$obs, into (link_temp)
drop _stack
gen serial=_n
sort serial
save temp2.dta, replace
restore

sort id_i id_j
gen serial=_n

sort serial
merge serial using temp1.dta
drop _merge

sort serial
merge serial using temp2.dta
drop _merge serial

/* binary network at endline */

gen link1=link0
gen temp=$ntk_change
replace link1=link_temp if (itt_i==1 | itt_j==1) & (id_i!= id_j) & temp!=0
drop temp link_temp

/* measurement error 
need to be symmetric --> lower triangular matrix */

preserve

keep id_i id_j link0 link1
drop if id_i==id_j
drop if id_i>id_j

shufflevar link0 link1

/* switch indicator */

local treshold r(p$prob_switch)

gen random1=uniform() 
qui sum random1, detail
gen switch1=(random1<`treshold') 

gen random2=uniform() 
qui sum random2, detail
gen switch2=(random2<`treshold') 

drop link0 link1 random1 random2
ren id_i id1
ren id_j id2
sort id1 id2

save temp3.dta, replace

restore

/* re-merge and symmetrize */

ren id_i id1
ren id_j id2

sort id1 id2 
merge id1 id2 using temp3.dta
drop _merge

ren id1 id_i
ren id2 id_j 

ren id_j id1
ren id_i id2

ren switch1 switch1A
ren switch2 switch2A
ren link0_shuffled link0_shuffledA
ren link1_shuffled link1_shuffledA

sort id1 id2 
merge id1 id2 using temp3.dta
drop _merge

ren id1 id_j
ren id2 id_i 

replace switch1= switch1A if switch1==.
replace switch1= 0 if switch1==.
drop switch1A

replace switch2= switch2A if switch2==.
replace switch2= 0 if switch2==.
drop switch2A

replace link0_shuffled= link0_shuffledA if link0_shuffled==.
replace link0_shuffled= 0 if link0_shuffled==.
drop link0_shuffledA

replace link1_shuffled= link1_shuffledA if link1_shuffled==.
replace link1_shuffled= 0 if link1_shuffled==.
drop link1_shuffledA

sort id_i id_j

capture erase temp1.dta
capture erase temp2.dta
capture erase temp3.dta

/* add measurement errors to binary networks */

replace link0=link0_shuffled if switch1==1
replace link1=link1_shuffled if switch2==1

/* rearrange */

drop link0_shuffled link1_shuffled switch1 switch2
sort id_i id_j

mata: G0 = st_data(., "link0")
mata: G0 = colshape(G0, $obs) 

mata: G1 = st_data(., "link1")
mata: G1 = colshape(G1, $obs) 

/* semi row standardization */

mata: G0_S=rowsum(G0,.)
mata: G0=G0:/G0_S

mata: G1_S=rowsum(G1,.)
mata: G1=G1:/G1_S

mata: mata drop G0_S G1_S

/* edit missing */

mata: G0=editmissing(G0, 0)
mata: G1=editmissing(G1, 0)

/* build delta_G */

mata: deltaG=G1-G0

mata: st_matrix("G0", G0)
mata: st_matrix("G1", G1)
mata: st_matrix("deltaG", deltaG)


/**********************************************************************
 COMPUTE THE DIRECT/ INDIRECT/ TOTAL TREATMENT EFFECT 
 OF THE STATIC AND DYNAMIC PEER EFFECT MODELS FOR THIS DRAW
**********************************************************************/

forvalues sim=1/$simul {

/************************
SAVE ESTIMATED COEFFICIENTS FOR EACH SIMULATION
************************/

use results_coeffs_$ntk_change.dta, clear
keep if sim==`sim'

mata: gamma_ols=st_data(.,"gamma_ols")
mata: gamma_stat=st_data(.,"gamma_stat")
mata: beta1_stat=st_data(.,"beta1_stat")
mata: gamma_dyn=st_data(.,"gamma_dyn")
mata: beta1_dyn=st_data(.,"beta1_dyn")
mata: beta2_dyn=st_data(.,"beta2_dyn")
mata: theta1_hat=st_data(.,"theta1_hat")
mata: theta1=st_data(.,"theta1")

/************************
STATIC MODEL
************************/

mata: A1 = static_TE (beta1_stat, G0, gamma_stat)

mata: D1 = diagonal(A1)
mata: DIRECT_static=mean(D1)

mata: C1 = colsum(A1)'
mata: TOTAL_static=mean(C1)

mata: S1=(C1-D1)
mata: INDIRECT_static=mean(S1)

/************************
DYNAMIC MODEL
* note: estimated theta1_hat
***********************/

mata: A2 = dynamic_TE (beta1_dyn, G0, beta2_dyn, deltaG, gamma_dyn, ITT, theta1_hat)

mata: D2 = diagonal(A2)
mata: DIRECT_dynamic=mean(D2)

mata: C2 = colsum(A2)'
mata: TOTAL_dynamic=mean(C2)

mata: S2=(C2-D2)
mata: INDIRECT_dynamic=mean(S2)

/*******************************************************************
BENCHMARK
********************************************************************/

mata: A3 = dynamic_TE ($beta1, G0, $beta2, deltaG, $gamma, ITT, theta1)

mata: D3 = diagonal(A3)
mata: DIRECT_benchmark=mean(D3)

mata: C3 = colsum(A3)'
mata: TOTAL_benchmark=mean(C3)

mata: S3=(C3-D3)
mata: INDIRECT_benchmark=mean(S3)

/* export */ 

mata: results=(gamma_ols, gamma_stat, beta1_stat, gamma_dyn, beta1_dyn, beta2_dyn, theta1_hat, theta1, DIRECT_static, TOTAL_static, INDIRECT_static, DIRECT_dynamic, TOTAL_dynamic, INDIRECT_dynamic, DIRECT_benchmark, TOTAL_benchmark, INDIRECT_benchmark)
mata: st_matrix("result", results)

clear
svmat result

ren result1 gamma_ols
ren result2 gamma_stat
ren result3 beta1_stat
ren result4 gamma_dyn
ren result5 beta1_dyn
ren result6 beta2_dyn
ren result7 theta1_hat
ren result8 theta1
ren result9 direct_static
ren result10 total_static
ren result11 indirect_static
ren result12 direct_dynamic
ren result13 total_dynamic
ren result14 indirect_dynamic
ren result15 direct_benchmark
ren result16 total_benchmark
ren result17 indirect_benchmark

gen sim=`sim'
gen dr= `dr'

order sim dr
append using results_mse_$ntk_change.dta
sort sim dr
save results_mse_$ntk_change.dta, replace 

}
}

/********************************************************************************************
*********************************************************************************************
6) PRODUCE FINAL STATISTICS
*********************************************************************************************
********************************************************************************************/

/* for model with no PE: compare to OLS coefficient */

gen total_ols=gamma_ols

foreach var1 in ols static dynamic {
foreach var2 in total direct indirect {

    capture confirm variable `var2'_`var1'
    if !_rc {
	
/* MSE */

gen temp=(`var2'_`var1'-`var2'_benchmark)^2
bysort dr: egen MSE_`var2'_`var1' = mean(temp)
drop temp

/* BIAS */

bysort dr: egen temp = mean(`var2'_`var1')
gen BIAS2_`var2'_`var1' = (temp-`var2'_benchmark)^2
drop temp

/* VAR */

bysort dr: egen temp = mean(`var2'_`var1')
gen temp2 = (`var2'_`var1'-temp)^2
bysort dr: egen VAR_`var2'_`var1' = mean(temp2)
drop temp temp2

	} 
else {
}

} 
}

/*display*/ 

format MSE_total_ols- VAR_indirect_dynamic %9.2f
tabstat MSE_total_ols- VAR_indirect_dynamic, stats(mean) f col(stat)



